library(ggplot2)
library(ggpubr)
Loading required package: magrittr
library(CDM)
Loading required package: mvtnorm
**********************************
** CDM 7.3-17 (2019-03-18 18:33:40)
** Cognitive Diagnostic Models **
**********************************
library(boot)
library(tidyverse)
[30m-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --[39m
[30m[32mv[30m [34mtibble [30m 2.1.1 [32mv[30m [34mpurrr [30m 0.3.2
[32mv[30m [34mtidyr [30m 0.8.3 [32mv[30m [34mdplyr [30m 0.8.0.[31m1[30m
[32mv[30m [34mreadr [30m 1.3.1 [32mv[30m [34mstringr[30m 1.4.0
[32mv[30m [34mtibble [30m 2.1.1 [32mv[30m [34mforcats[30m 0.4.0 [39m
[30m-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[30m [34mtidyr[30m::[32mextract()[30m masks [34mmagrittr[30m::extract()
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()
[31mx[30m [34mpurrr[30m::[32mset_names()[30m masks [34mmagrittr[30m::set_names()[39m
library(dummy)
dummy 0.1.3
dummyNews()
library(stringi)
library(stringr)
rm(list = ls())
x_pre <- read_csv("../data/FirstYearProject/OUTPUT.csv")
Parsed with column specification:
cols(
.default = col_character(),
SubjectID = [32mcol_double()[39m,
`Auto Score 1` = [32mcol_double()[39m,
`Auto Score 2` = [32mcol_double()[39m,
`Auto Score 3` = [32mcol_double()[39m,
`Auto Score 4` = [32mcol_double()[39m,
`Auto Score 5` = [32mcol_double()[39m,
`Auto Score 6` = [32mcol_double()[39m,
`Auto Score 7` = [32mcol_double()[39m,
`Auto Score 8` = [32mcol_double()[39m,
`Auto Score 9` = [32mcol_double()[39m,
`Auto Score 10` = [32mcol_double()[39m,
`Auto Score 11` = [32mcol_double()[39m,
`Auto Score 12` = [32mcol_double()[39m,
`Auto Score 13` = [32mcol_double()[39m,
`Auto Score 14` = [32mcol_double()[39m,
`Auto Score 15` = [32mcol_double()[39m,
`Auto Score 16` = [32mcol_double()[39m,
`Auto Score 17` = [32mcol_double()[39m,
`Auto Score 18` = [32mcol_double()[39m,
`Auto Score 19` = [32mcol_double()[39m
# ... with 34 more columns
)
See spec(...) for full column specifications.
Q_from_book <- read_csv("../data/FirstYearProject/final_result_similar.csv") #%>% mutate(`Learning Objective` = `Topic`)
Parsed with column specification:
cols(
Question = [31mcol_character()[39m,
Option1 = [31mcol_character()[39m,
Option2 = [31mcol_character()[39m,
Option3 = [31mcol_character()[39m,
Option4 = [31mcol_character()[39m,
Answer = [31mcol_character()[39m,
`Learning Objective` = [31mcol_character()[39m,
Topic = [31mcol_character()[39m,
`Difficulty Level` = [31mcol_character()[39m,
`Skill Level` = [31mcol_character()[39m,
`APA Learning Objective` = [31mcol_character()[39m,
alpha = [31mcol_character()[39m
)
Q_from_book <- Q_from_book %>%
mutate(`Learning Objective` = str_trim(str_remove_all(`alpha`, "\\."))) %>%
filter(`Learning Objective` != "nan")
glimpse(Q_from_book)
Observations: 1,006
Variables: 12
$ Question [3m[38;5;246m<chr>[39m[23m "Which of the following is an example of social influence?", "Which of the following is an example of a direct persuasion attem...
$ Option1 [3m[38;5;246m<chr>[39m[23m "a. You feel guilty because you lied to your trusting professor about your assignment.", "a. A bully threatens Billy and steals...
$ Option2 [3m[38;5;246m<chr>[39m[23m "b. When you get hungry, you have trouble concentrating.", "b. Ramona works hard in school to make her mother proud.", "b. A se...
$ Option3 [3m[38;5;246m<chr>[39m[23m "c. You didn\u0092t do well on the test because you stayed up all night cramming.", "c. Marianne thinks of her ex-boyfriend and...
$ Option4 [3m[38;5;246m<chr>[39m[23m "d. You almost fall asleep at the wheel, so you pull off the road to take a short nap.", "d. Jason moves from New York to Atlan...
$ Answer [3m[38;5;246m<chr>[39m[23m "A", "A", "D", "C", "A", "C", "C", "B", "D", "D", "C", "B", "C", "A", "B", "C", "D", "D", "D", "C", "A", "B", "D", "B", "D", "B...
$ `Learning Objective` [3m[38;5;246m<chr>[39m[23m "Understand the Concepts--11 Describe key concepts, principles, and overarching themes in psychology", "Understand the Concepts...
$ Topic [3m[38;5;246m<chr>[39m[23m "Defining Social Psychology", "Defining Social Psychology", "Defining Social Psychology", "Defining Social Psychology", "Defini...
$ `Difficulty Level` [3m[38;5;246m<chr>[39m[23m "Moderate", "Moderate", "Moderate", "Moderate", "Easy", "Moderate", "Moderate", "Moderate", "Moderate", "Easy", "Difficult", "D...
$ `Skill Level` [3m[38;5;246m<chr>[39m[23m "Understand the Concepts", "Understand the Concepts", "Understand the Concepts", "Understand the Concepts", "Remember the Facts...
$ `APA Learning Objective` [3m[38;5;246m<chr>[39m[23m "1.1 Describe key concepts, principles, and overarching themes in psychology.", "1.1 Describe key concepts, principles, and ove...
$ alpha [3m[38;5;246m<chr>[39m[23m "Understand the Concepts--1.1 Describe key concepts, principles, and overarching themes in psychology.", "Understand the Concep...
Q_from_book %>% distinct(`Skill Level`)
NA
learning_obj <- Q_from_book %>%
distinct(`Learning Objective`) %>%
mutate(lo_id = row_number())
Q_pre <- Q_from_book %>% inner_join(learning_obj) %>% select(Question, `Learning Objective`, lo_id) %>% mutate(temp = str_trim(str_replace_all(Question, "_|\\.", "")))
Joining, by = "Learning Objective"
learning_obj
Q_pre <- Q_from_book %>% inner_join(learning_obj) %>% select(Question, `Learning Objective`, lo_id) %>%
mutate(temp = str_trim(str_replace_all(Question, "_|\\.", ""))) %>%
mutate(Q_UNIQUE_ID = row_number())
Joining, by = "Learning Objective"
Q_pre
NA
head(x_pre)
NA
x.gather <-x_pre %>% gather(key = "key", value = "value", -File, -SubjectID)
x.gather
x.questions <-
x.gather %>% filter(str_detect(key, "Question")) %>%
anti_join(
x.gather %>% filter(str_detect(key, "Question")) %>%
group_by(File, SubjectID, value) %>%
summarise(cnt = n(), question_number = paste(key, collapse = ",")) %>%
filter(cnt > 1) %>% ungroup(),
by = "value"
) # Taking out generic questions (having same question text but different answers)
x.questions.dist <- x.questions %>% distinct(value) %>% drop_na() %>%
#mutate(Q_UNIQUE_ID = row_number()) %>%
mutate(temp = str_trim(str_replace_all(value, "_|\\.", ""))) %>%
inner_join(
Q_pre, by = "temp"
)
x.questions.dist %>% write_csv("../data/FirstYearProject/Q_distinct_id.csv")
x.questions.dist
Q <- x.questions.dist %>% distinct(Q_UNIQUE_ID, lo_id) %>% arrange(Q_UNIQUE_ID) %>%
mutate(present = 1) %>%
spread(key = "lo_id", value = "present")
Q %>%
mutate_all(function(x) ifelse(is.na(x), 0, x)) %>%
write_csv("../data/FirstYearProject/Q.csv")
Q
NA
x.answers <-
x.gather %>% filter(!str_detect(key, "Question"))
x.answers
#Total Questions presented to students 53 Questions are randomly presented to students
x.questions %>% distinct(key)
x.questions.id <- x.questions %>% inner_join(x.questions.dist) #%>% mutate(Q_UNIQUE_ID = factor(Q_UNIQUE_ID))
Joining, by = "value"
x.questions.id
Questions with same text but different Answers
x.questions.id.filterd <- x.questions.id %>%
anti_join(
x.questions.id %>%
group_by(File, SubjectID, Question) %>%
summarise(cnt = n(), question_number = paste(key, collapse = ",")) %>%
filter(cnt > 1) %>% ungroup(),
by = "Question"
) %>% select(-lo_id, -`Learning Objective`)
x.questions.id.filterd
NA
We have the correct Questions. Now we need to add marks of answers against the questions.
X.pre <- x.questions.id %>% mutate(id = str_split(key, " ", simplify = TRUE)[,2]) %>%
inner_join(
x.answers %>% mutate(id = str_split(key, " ", simplify = TRUE)[,3]), by = c("File", "SubjectID", "id")
) %>%
mutate(value.y = as.integer(value.y)) #%>%
#mutate(Q_UNIQUE_ID = as.integer(Q_UNIQUE_ID))
#write_csv(X.pre, "X_Pre.csv")
X.pre
unique(X.pre$Q_UNIQUE_ID)
[1] 19 182 247 300 118 91 114 88 82 9 260 105 78 13 261 49 17 148 274 289 40 166 66 202 225 177 126 204 140 294 116 136
[33] 186 223 149 229 122 282 137 64 292 67 33 30 272 505 974 481 980 478 989 494 497 461 523 466 404 377 442 355 530 515 475 431
[65] 440 447 349 417 324 357 474 502 994 516 372 332 498 381 506 369 344 521 346 468 370 988 484 911 413 419 606 963 602 592 563 614
[97] 550 607 593 612 558 943 947 559 661 935 912 567 659 557 932 926 647 629 554 578 572 641 619 648 920 962 965 555 817 732 814 870
[129] 810 866 837 761 844 665 816 849 786 842 727 879 822 860 710 901 694 760 888 883 874 767 780 739 765 715 788 856 755 750 823 713
[161] 666 664 667 722 663 691 668 909 1004 671 695 830 835 796 815 227 232 57 56 119 262 127 100 196 213 93 70 307 46 278 224 85
[193] 69 47 198 90 183 98 504 514 380 388 326 318 995 340 444 414 433 455 462 405 493 378 411 379 327 358 457 338 469 427 323 351
[225] 416 361 415 425 510 441 446 970 459 408 356 496 531 337 487 984 649 547 928 562 924 918 613 541 634 616 583 556 545 914 537 916
[257] 945 931 927 954 633 930 644 605 625 961 655 959 951 683 839 868 717 897 867 904 850 751 821 673 908 707 776 749 689 807 669 730
[289] 889 852 910 728 898 834 838 800 764 775 828 890 726 806 742 803 716 746 847 752 895 670 887 768 896 902 226 79 150 172 22 258
[321] 102 208 298 187 155 180 32 65 215 43 129 157 197 128 199 156 113 27 221 268 84 145 3 256 51 290 176 464 527 368 335 450
[353] 526 350 407 977 373 352 347 463 979 313 993 503 997 422 403 430 320 394 409 490 328 316 981 434 477 376 410 353 429 533 341 973
[385] 524 975 624 937 958 535 917 588 967 925 934 599 922 597 571 630 660 579 565 610 637 636 848 733 851 736 685 674 843 785 884 802
[417] 737 873 787 784 805 906 724 735 841 778 853 846 863 811 793 789 708 684 682 876 692 677 743 779 832 813 782 827 758 16 147 117
[449] 277 212 115 301 95 165 255 194 87 50 45 311 144 241 170 15 10 273 7 25 216 131 219 134 106 276 41 299 266 458 365 476
[481] 512 321 374 972 319 399 363 978 389 443 485 982 436 460 334 420 969 976 511 371 401 322 618 627 621 587 941 553 640 568 544 561
[513] 543 651 913 654 608 626 546 600 622 603 598 942 631 905 718 719 681 672 881 783 878 771 734 829 706 687 891 877 894 781 770 792
[545] 688 679 797 690 799 794 686 192 112 161 167 97 305 251 302 38 11 164 81 263 77 143 68 293 63 179 364 473 339 428 354 489
[577] 465 499 362 529 509 480 520 375 486 406 387 623 642 580 938 936 609 575 638 596 586 574 538 946 549 652 923 594 929 745 753 899
[609] 693 702 831 840 680 880 903 865 886 774 141 175 107 101 14 244 824 825 826 998 999 1000 1003 163 89 71 248 222 242 132 162 108
[641] 205 158 201 304 18 96 169 252 193 142 171 501 445 456 470 513 400 528 495 479 488 325 519 397 412 500 657 581 949 955 611 632
[673] 956 551 643 919 615 953 948 560 808 712 705 791 872 714 907 836 754 744 729 748 820 678 675 92 217 188 214 200 2 103 21 206
[705] 253 195 235 86 26 109 230 308 437 491 448 392 385 395 508 585 650 646 617 645 548 584 939 577 662 566 582 966 812 738 854 885
[737] 861 855 801 777 701 833 900 723 703 55 234 209 53 267 174 231 236 207 271 303 243 94 452 421 383 532 432 472 453 518 359 482
[769] 628 915 595 635 944 964 591 552 763 875 697 893 772 869 762 190 280 120 24 159 246 74 110 31 284 111 168 29 310 366 382 317
[801] 331 402 384 992 418 507 454 522 539 653 933 540 798 1005 759 700 795 731 725 747 871 864 699 809 178 154 39 285 281 160 238 1
[833] 4 28 5 8 42 525 314 492 333 423 343 604 542 845 766 859 819 23 76 138 73 135 37 426 439 983 348 564 601 576 590 921
[865] 773 704 709 804 279 211 259 83 104 44 391 534 656 573 862 769 80 233 264 185 296 12 367 396 390 987 658 892 790 72 99 287
[897] 151 275 306 449 336 950 756 250 152 451 882 36 257 424 639 698 740 283 146 60 61 398 345 75 62 297 6 985 996 393 940 124
[929] 270 309 952 52 48 218 240 125 467 210 471 858 181 121 191 696 139 249 20 288 220
X<- X.pre %>% select(-key.x, -key.y, -value.x, -id, -temp, -lo_id, -`Learning Objective`, -Question ) %>%
spread(key = "Q_UNIQUE_ID", value = "value.y")
write_csv(X, "../data/FirstYearProject/X.csv")
X
Let’s run some test to verify X
X %>% select(-File, -SubjectID) %>% summarise_all(sum, na.rm = TRUE)
NA
X %>% gather(key = "QuestionID", value = "Score", -File, -SubjectID)
NA
library(janitor)
X %>% filter(File == "Exam1Trial1") %>% remove_empty(.,which = "cols")
NA
question_attempted <- X %>% remove_empty(.,which = "cols") %>%
gather(key = "QuestionID", value = "Scores", -File, -SubjectID) %>%
group_by(File, QuestionID) %>%
summarise(total_na = sum(is.na(Scores)), total = n(), total_attempted = total - total_na)
question_attempted <- question_attempted %>% filter(total_attempted >= 8)
question_attempted
#%>% filter(QuestionID == "103")
Filtering out questions with lesser attempts
X_filtered <- X %>% remove_empty(.,which = "cols") %>%
gather(key = "QuestionID", value = "Scores", -File, -SubjectID) %>% semi_join(question_attempted, by = c("File", "QuestionID")) %>%
spread(key = "QuestionID", value = "Scores")
X_filtered
X %>% remove_empty(.,which = "cols") %>% write_csv("../data/FirstYearProject/X.csv")
X_filtered %>% remove_empty(.,which = "cols") %>% write_csv("../data/FirstYearProject/X_filtered.csv")
Write CSVs seperate for each trial to avoid having columns for those questions that were not asked in a trial. This will help to show the true picture of sparsity.
fn.clean <- function (df) {
return(df %>% remove_empty(.,which = "cols"))
}
X.individual.list <- X %>%
nest(-File, .key = "X_full") %>%
mutate(X = map(X_full, fn.clean),
Q_full = map(X_full, function(df) return (Q)))
X.individual.list
[38;5;246m# A tibble: 8 x 4[39m
File X_full X Q_full
[3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<list>[39m[23m [3m[38;5;246m<list>[39m[23m [3m[38;5;246m<list>[39m[23m
[38;5;250m1[39m Exam1Trial1 [38;5;246m<tibble [74 x 950]>[39m [38;5;246m<tibble [74 x 295]>[39m [38;5;246m<tibble [949 x 19]>[39m
[38;5;250m2[39m Exam1Trial2 [38;5;246m<tibble [57 x 950]>[39m [38;5;246m<tibble [57 x 286]>[39m [38;5;246m<tibble [949 x 19]>[39m
[38;5;250m3[39m Exam2Trial1 [38;5;246m<tibble [66 x 950]>[39m [38;5;246m<tibble [66 x 237]>[39m [38;5;246m<tibble [949 x 19]>[39m
[38;5;250m4[39m Exam2Trial2 [38;5;246m<tibble [67 x 950]>[39m [38;5;246m<tibble [67 x 238]>[39m [38;5;246m<tibble [949 x 19]>[39m
[38;5;250m5[39m Exam3Trial1 [38;5;246m<tibble [47 x 950]>[39m [38;5;246m<tibble [47 x 178]>[39m [38;5;246m<tibble [949 x 19]>[39m
[38;5;250m6[39m Exam3Trial2 [38;5;246m<tibble [78 x 950]>[39m [38;5;246m<tibble [78 x 179]>[39m [38;5;246m<tibble [949 x 19]>[39m
[38;5;250m7[39m Exam4Trial1 [38;5;246m<tibble [64 x 950]>[39m [38;5;246m<tibble [64 x 240]>[39m [38;5;246m<tibble [949 x 19]>[39m
[38;5;250m8[39m Exam4Trial2 [38;5;246m<tibble [72 x 950]>[39m [38;5;246m<tibble [72 x 240]>[39m [38;5;246m<tibble [949 x 19]>[39m
X %>% filter(File == "Exam1Trial1") %>% remove_empty(.,which = "cols")
NA
Q
NA
fn.skills <- function (df) {
df <- df %>% remove_empty(.,which = "cols") %>%
gather(key = "Q_UNIQUE_ID", value = "Score", -SubjectID) %>%
mutate(Q_UNIQUE_ID = as.integer(Q_UNIQUE_ID)) %>% distinct(Q_UNIQUE_ID) %>%
inner_join(
Q
) %>% remove_empty(.,which = "cols") %>% mutate_all(function(x) ifelse(is.na(x), 0, x))
return(df)
}
X.Q <- X.individual.list %>%
mutate(Q = map(X, fn.skills))
Joining, by = "Q_UNIQUE_ID"
Joining, by = "Q_UNIQUE_ID"
Joining, by = "Q_UNIQUE_ID"
Joining, by = "Q_UNIQUE_ID"
Joining, by = "Q_UNIQUE_ID"
Joining, by = "Q_UNIQUE_ID"
Joining, by = "Q_UNIQUE_ID"
Joining, by = "Q_UNIQUE_ID"
X.Q
[38;5;246m# A tibble: 8 x 5[39m
File X_full X Q_full Q
[3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<list>[39m[23m [3m[38;5;246m<list>[39m[23m [3m[38;5;246m<list>[39m[23m [3m[38;5;246m<list>[39m[23m
[38;5;250m1[39m Exam1Trial1 [38;5;246m<tibble [74 x 950]>[39m [38;5;246m<tibble [74 x 295]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [294 x 15]>[39m
[38;5;250m2[39m Exam1Trial2 [38;5;246m<tibble [57 x 950]>[39m [38;5;246m<tibble [57 x 286]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [285 x 15]>[39m
[38;5;250m3[39m Exam2Trial1 [38;5;246m<tibble [66 x 950]>[39m [38;5;246m<tibble [66 x 237]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [236 x 7]>[39m
[38;5;250m4[39m Exam2Trial2 [38;5;246m<tibble [67 x 950]>[39m [38;5;246m<tibble [67 x 238]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [237 x 7]>[39m
[38;5;250m5[39m Exam3Trial1 [38;5;246m<tibble [47 x 950]>[39m [38;5;246m<tibble [47 x 178]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [177 x 4]>[39m
[38;5;250m6[39m Exam3Trial2 [38;5;246m<tibble [78 x 950]>[39m [38;5;246m<tibble [78 x 179]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [178 x 4]>[39m
[38;5;250m7[39m Exam4Trial1 [38;5;246m<tibble [64 x 950]>[39m [38;5;246m<tibble [64 x 240]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [239 x 6]>[39m
[38;5;250m8[39m Exam4Trial2 [38;5;246m<tibble [72 x 950]>[39m [38;5;246m<tibble [72 x 240]>[39m [38;5;246m<tibble [949 x 19]>[39m [38;5;246m<tibble [239 x 6]>[39m
X %>% filter(File == "Exam2Trial2") %>% remove_empty(.,which = "cols") %>%
gather(key = "Q_UNIQUE_ID", value = "Score", -File, -SubjectID) %>%
mutate(Q_UNIQUE_ID = as.integer(Q_UNIQUE_ID)) %>% distinct(Q_UNIQUE_ID) %>%
inner_join(
Q, by = "Q_UNIQUE_ID"
) %>% remove_empty(.,which = "cols") %>% mutate_all(function(x) ifelse(is.na(x), 0, x)) %>% summarise_all(sum)
[38;5;246m# A tibble: 1 x 7[39m
Q_UNIQUE_ID `1` `2` `4` `15` `16` `19`
[3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m [4m1[24m[4m1[24m[4m5[24m028 97 70 67 1 1 1
X %>% filter(File == "Exam1Trial1")
NA
fn.write <- function(File, X_full, X, Q_full, Q) {
print(X)
X %>% write_csv(paste0("../data/FirstYearProject/",File,"_X.csv"))
Q %>% write_csv(paste0("../data/FirstYearProject/",File,"_Q.csv"))
}
#walk2(X.Q$File, X.Q$data_clean, X.Q$data_Q_skills, fn.write)
pwalk(X.Q, fn.write)
NA
NA